# import sys
# !conda install --yes --prefix {sys.prefix} plotly
#!pip install holidays
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import iplot
def plot_dataset(df, title):
data = []
value = go.Scatter(
x=df.index,
y=df.value,
mode="lines",
name="values",
marker=dict(),
text=df.index,
line=dict(color="rgba(0,0,0, 0.3)"),
)
data.append(value)
layout = dict(
title=title,
xaxis=dict(title="Date", ticklen=5, zeroline=False),
yaxis=dict(title="Value", ticklen=5, zeroline=False),
)
fig = dict(data=data, layout=layout)
iplot(fig)
import pandas as pd
df = pd.read_csv('./archive/PJME_hourly.csv')
df = df.set_index(['Datetime'])
df.index = pd.to_datetime(df.index)
if not df.index.is_monotonic:
df = df.sort_index()
df = df.rename(columns={'PJME_MW': 'value'})
plot_dataset(df, title='PJM East (PJME) Region: estimated energy consumption in Megawatts (MW)')
df
def generate_time_lags(df, n_lags):
df_n = df.copy()
for n in range(1, n_lags + 1):
df_n[f"lag{n}"] = df_n["value"].shift(n)
df_n = df_n.iloc[n_lags:]
return df_n
input_dim = 100
df_generated = generate_time_lags(df, input_dim)
df_generated
df_features = (
df
.assign(hour = df.index.hour)
.assign(day = df.index.day)
.assign(month = df.index.month)
.assign(day_of_week = df.index.dayofweek)
.assign(week_of_year = df.index.week)
)
df_features
def onehot_encode_pd(df, col_name):
dummies = pd.get_dummies(df[col_name], prefix=col_name)
return pd.concat([df, dummies], axis=1).drop(columns=[col_name])
columns = ['month','day','day_of_week','week_of_year']
for i in columns:
df_features = onehot_encode_pd(df_features, i)
df_features
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
def onehot_encode(df, onehot_columns):
ct = ColumnTransformer(
[('onehot', OneHotEncoder(drop='first'), onehot_columns)],
remainder='passthrough'
)
return ct.fit_transform(df)
onehot_columns = ['hour']
onehot_encoded = onehot_encode(df_features, onehot_columns)
import numpy as np
def generate_cyclical_features(df, col_name, period, start_num=0):
kwargs = {
f'sin_{col_name}' : lambda x: np.sin(2*np.pi*(df[col_name]-start_num)/period),
f'cos_{col_name}' : lambda x: np.cos(2*np.pi*(df[col_name]-start_num)/period)
}
return df.assign(**kwargs).drop(columns=[col_name])
df_features = generate_cyclical_features(df_features, 'hour', 24, 0)
# df_features = generate_cyclical_features(df_features, 'day_of_week', 7, 0)
# df_features = generate_cyclical_features(df_features, 'month', 12, 1)
# df_features = generate_cyclical_features(df_features, 'week_of_year', 52, 0)
df_features
from datetime import date
import holidays
us_holidays = holidays.US()
def is_holiday(date):
date = date.replace(hour = 0)
return 1 if (date in us_holidays) else 0
def add_holiday_col(df, holidays):
return df.assign(is_holiday = df.index.to_series().apply(is_holiday))
df_features = add_holiday_col(df_features, us_holidays)
df_features
from sklearn.model_selection import train_test_split
def feature_label_split(df, target_col):
y = df[[target_col]]
X = df.drop(columns=[target_col])
return X, y
def train_val_test_split(df, target_col, test_ratio):
val_ratio = test_ratio / (1 - test_ratio)
X, y = feature_label_split(df, target_col)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio, shuffle=False)
return X_train, X_val, X_test, y_train, y_val, y_test
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(df_features, 'value', 0.2)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_arr = scaler.fit_transform(X_train)
X_val_arr = scaler.transform(X_val)
X_test_arr = scaler.transform(X_test)
y_train_arr = scaler.fit_transform(y_train)
y_val_arr = scaler.transform(y_val)
y_test_arr = scaler.transform(y_test)
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
def get_scaler(scaler):
scalers = {
"minmax": MinMaxScaler,
"standard": StandardScaler,
"maxabs": MaxAbsScaler,
"robust": RobustScaler,
}
return scalers.get(scaler.lower())()
scaler = get_scaler('robust')
#!pip install torchvision
from torch.utils.data import TensorDataset, DataLoader
import torch
batch_size = 64
train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
test_features = torch.Tensor(X_test_arr)
test_targets = torch.Tensor(y_test_arr)
train = TensorDataset(train_features, train_targets)
val = TensorDataset(val_features, val_targets)
test = TensorDataset(test_features, test_targets)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader_one = DataLoader(test, batch_size=1, shuffle=False, drop_last=True)
import torch.nn as nn
import torch.nn.functional as F
class RNNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
super(RNNModel, self).__init__()
# Defining the number of layers and the nodes in each layer
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# RNN layers
self.rnn = nn.RNN(
input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initializing hidden state for first input with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# Forward propagation by passing in the input and hidden state into the model
out, h0 = self.rnn(x, h0.detach())
# Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
# so that it can fit into the fully connected layer
out = out[:, -1, :]
# Convert the final state to our desired output shape (batch_size, output_dim)
out = self.fc(out)
return out
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
super(LSTMModel, self).__init__()
# Defining the number of layers and the nodes in each layer
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# LSTM layers
self.lstm = nn.LSTM(
input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initializing hidden state for first input with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# Initializing cell state for first input with zeros
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# We need to detach as we are doing truncated backpropagation through time (BPTT)
# If we don't, we'll backprop all the way to the start even after going through another batch
# Forward propagation by passing in the input, hidden state, and cell state into the model
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
# Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
# so that it can fit into the fully connected layer
out = out[:, -1, :]
# Convert the final state to our desired output shape (batch_size, output_dim)
out = self.fc(out)
return out
class GRUModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
super(GRUModel, self).__init__()
# Defining the number of layers and the nodes in each layer
self.layer_dim = layer_dim
self.hidden_dim = hidden_dim
# GRU layers
self.gru = nn.GRU(
input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initializing hidden state for first input with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# Forward propagation by passing in the input and hidden state into the model
out, _ = self.gru(x, h0.detach())
# Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
# so that it can fit into the fully connected layer
out = out[:, -1, :]
# Convert the final state to our desired output shape (batch_size, output_dim)
out = self.fc(out)
return out
def get_model(model, model_params):
models = {
"rnn": RNNModel,
"lstm": LSTMModel,
"gru": GRUModel,
}
return models.get(model.lower())(**model_params)
class Optimization:
def __init__(self, model, loss_fn, optimizer):
self.model = model
self.loss_fn = loss_fn
self.optimizer = optimizer
self.train_losses = []
self.val_losses = []
def train_step(self, x, y):
# Sets model to train mode
self.model.train()
# Makes predictions
yhat = self.model(x)
# Computes loss
loss = self.loss_fn(y, yhat)
# Computes gradients
loss.backward()
# Updates parameters and zeroes gradients
self.optimizer.step()
self.optimizer.zero_grad()
# Returns the loss
return loss.item()
def train(self, train_loader, val_loader, batch_size=64, n_epochs=50, n_features=1):
model_path = 'test'
# model_path = f'models/{self.model}_{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
for epoch in range(1, n_epochs + 1):
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
y_batch = y_batch.to(device)
loss = self.train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
self.train_losses.append(training_loss)
with torch.no_grad():
batch_val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.view([batch_size, -1, n_features]).to(device)
y_val = y_val.to(device)
self.model.eval()
yhat = self.model(x_val)
val_loss = self.loss_fn(y_val, yhat).item()
batch_val_losses.append(val_loss)
validation_loss = np.mean(batch_val_losses)
self.val_losses.append(validation_loss)
if (epoch <= 10) | (epoch % 50 == 0):
print(
f"[{epoch}/{n_epochs}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}"
)
torch.save(self.model.state_dict(), model_path)
def evaluate(self, test_loader, batch_size=1, n_features=1):
with torch.no_grad():
predictions = []
values = []
for x_test, y_test in test_loader:
x_test = x_test.view([batch_size, -1, n_features]).to(device)
y_test = y_test.to(device)
self.model.eval()
yhat = self.model(x_test)
predictions.append(yhat.to(device).detach().numpy())
values.append(y_test.to(device).detach().numpy())
return predictions, values
def plot_losses(self):
plt.plot(self.train_losses, label="Training loss")
plt.plot(self.val_losses, label="Validation loss")
plt.legend()
plt.title("Losses")
plt.show()
plt.close()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import torch.optim as optim
import datetime
input_dim = len(X_train.columns)
output_dim = 1
hidden_dim = 64
layer_dim = 3
batch_size = 64
dropout = 0.2
n_epochs = 10
learning_rate = 1e-3
weight_decay = 1e-6
model_params = {'input_dim': input_dim,
'hidden_dim' : hidden_dim,
'layer_dim' : layer_dim,
'output_dim' : output_dim,
'dropout_prob' : dropout}
model = get_model('lstm', model_params)
loss_fn = nn.MSELoss(reduction="mean")
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)
opt.train(train_loader, val_loader, batch_size=batch_size, n_epochs=n_epochs, n_features=input_dim)
opt.plot_losses()
predictions, values = opt.evaluate(test_loader_one, batch_size=1, n_features=input_dim)
def inverse_transform(scaler, df, columns):
for col in columns:
scaler.fit(df[col])
df[col] = scaler.inverse_transform(df[col])
return df
def format_predictions(predictions, values, df_test, scaler):
vals = np.concatenate(values, axis=0).ravel()
preds = np.concatenate(predictions, axis=0).ravel()
df_result = pd.DataFrame(data={"value": vals, "prediction": preds}, index=df_test.head(len(vals)).index)
df_result = df_result.sort_index()
#scaler.fit(df_result)
df_result = inverse_transform(scaler, df_result, [["value", "prediction"]])
return df_result
df_result = format_predictions(predictions, values, X_test, scaler)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
def calculate_metrics(df):
return {'mae' : mean_absolute_error(df.value, df.prediction),
'rmse' : mean_squared_error(df.value, df.prediction) ** 0.5,
'r2' : r2_score(df.value, df.prediction)}
result_metrics = calculate_metrics(df_result)
from sklearn.linear_model import LinearRegression
def build_baseline_model(df, test_ratio, target_col):
X, y = feature_label_split(df, target_col)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_ratio, shuffle=False
)
model = LinearRegression()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
result = pd.DataFrame(y_test)
result["prediction"] = prediction
result = result.sort_index()
return result
df_baseline = build_baseline_model(df_features, 0.2, 'value')
baseline_metrics = calculate_metrics(df_baseline)
import plotly.offline as pyo
def plot_predictions(df_result, df_baseline):
data = []
value = go.Scatter(
x=df_result.index,
y=df_result.value,
mode="lines",
name="values",
marker=dict(),
text=df_result.index,
line=dict(color="rgba(0,0,0, 0.3)"),
)
data.append(value)
# baseline = go.Scatter(
# x=df_baseline.index,
# y=df_baseline.prediction,
# mode="lines",
# line={"dash": "dot"},
# name='linear regression',
# marker=dict(),
# text=df_baseline.index,
# opacity=0.8,
# )
# data.append(baseline)
prediction = go.Scatter(
x=df_result.index,
y=df_result.prediction,
mode="lines",
line={"dash": "dot"},
name='predictions',
marker=dict(),
text=df_result.index,
opacity=0.8,
)
data.append(prediction)
layout = dict(
title="Predictions vs Actual Values for the dataset",
xaxis=dict(title="Time", ticklen=5, zeroline=False),
yaxis=dict(title="Value", ticklen=5, zeroline=False),
)
fig = dict(data=data, layout=layout)
iplot(fig)
# Set notebook mode to work in offline
pyo.init_notebook_mode()
plot_predictions(df_result, df_baseline)